clear all

********************************************************************************
// Organize data for main analysis
********************************************************************************

use "ZCTALevelProcessed_R2_input.dta", clear
collapse (sum) tot_pop n_urgentcare n_hospitals n_hospaffucc_geo n_hospaffucc_geo2 (firstnm) state, by(pcsa)
save "PCSALevelData_Input.dta", replace

use "ZCTALevelProcessed_R2_input.dta", clear
keep zcta5a tot_pop 
save "zcta_pop.dta", replace

shp2dta using "pcsa_shapefiles/uspcsav31_HRSA.shp", data(uspcsa_db) coor(uspcsa_coord) replace genc(center) genid(pcsa)

*shp2dta using "zcta_shapefiles/tl_2015_us_zcta510.shp", data(uszcta_db) coor(uszcta_coord) replace genc(center) genid(zcta5a)

*shp2dta using "county_shapefiles/cb_2018_us_county_500k.shp", data(uscounty_db) coor(uscounty_coord) replace genc(center) genid(county)

use "uszcta_db.dta", clear
ren zcta5a id 
destring ZCTA5CE10, gen(zcta5a)
merge 1:1 zcta5a using "zcta_pcsa.dta", keep(3) nogen
merge 1:1 zcta5a using "zcta_pop.dta", keep(3) nogen
replace tot_pop = tot_pop*10000*1000
gcollapse (mean) x_center y_center [w=tot_pop], by(pcsa)
save "pcsa_wpop_centroids.dta", replace

use "uspcsa_db.dta", clear
keep PCSA
ren PCSA pcsa
destring pcsa, replace
merge 1:1 pcsa using "pcsa_wpop_centroids.dta", keep(3) nogen
save "base_dist.dta", replace
use "uspcsa_db.dta", clear
keep PCSA
ren PCSA pcsa
destring pcsa, replace
merge 1:1 pcsa using "pcsa_wpop_centroids.dta", keep(3) nogen
ren pcsa pcsb
ren x_center x_centroid
ren y_center y_centroid
save "neighbors.dta", replace

use "base_dist.dta", clear
geonear pcsa y_center x_center using "neighbors", neighbors(pcsb y_centroid x_centroid) ignoreself long miles within(100) 
save "pcsa_dist_input.dta", replace

use "PCSALevelData_Input.dta", clear
keep pcsa tot_pop n_urgentcare n_hospitals state n_hospaffucc_geo 
merge 1:m pcsa using "pcsa_dist_input.dta", keep(3) nogen
save "pcsa_dist.dta", replace

use "PCSALevelData_Input.dta", clear
keep pcsa tot_pop n_urgentcare n_hospitals state n_hospaffucc_geo 
ren pcsa pcsb 
ren tot_pop b_tot_pop
ren n_urgentcare b_n_urgentcare
ren n_hospitals b_n_hospitals
ren state b_state
ren n_hospaffucc_geo b_n_hospaffucc_geo
merge 1:m pcsb using "pcsa_dist.dta", keep(3) nogen
format pcsa %14.0g 
format pcsb %14.0g
save "pcsa_dist.dta", replace

// Market definition

use "pcsa_dist.dta", clear
cap drop todropi
gen todropi = 0
replace tot_pop = tot_pop*10000
replace b_tot_pop = b_tot_pop*10000
replace todropi = 1 if mi_to_pcsb<=35 & b_tot_pop>=tot_pop
egen todropa = max(todropi), by(pcsa)
drop if todropa
keep pcsa
duplicates drop
save "market_definition.dta", replace 

use "ZCTALevelProcessed_R2_input.dta", clear
merge m:1 pcsa using "market_definition.dta", keep(3) nogen
keep pcsa state 
duplicates drop 
bysort pcsa state: gen ind = (_n==1)
egen num_state = sum(ind), by(pcsa)
duplicates drop pcsa, force 
drop if num_state>1
keep pcsa 
save "market_definition.dta", replace

use "uspcsa_db.dta", clear
ren pcsa id 
destring PCSA, replace
ren PCSA pcsa
merge 1:1 pcsa using "market_definition.dta", gen(robust3)
gen in_sample = 0
replace in_sample = 1 if robust3==3
drop if y_center>49 | y_center<25
drop if x_center> -66 | x_center< -124
replace x_center = . if robust3!=3
replace y_center = . if robust3!=3
spmap using "uspcsa_coord.dta", id(id) clmethod(unique) ocolor(gs10) legend(size(medium)) fcolor(gs12) point(xcoord(x_center) ycoord(y_center) size(vsmall))
graph export "pcsa_map.png", replace
graph export "pcsa_map.pdf", replace

// Organize data

clear all
use "ZCTALevelProcessed_R2_input.dta", clear
collapse (sum) tot_pop n_urgentcare n_hospitals n_hospaffucc_geo n_hospaffucc_geo2 any_emergency hisp black other white og_* (firstnm) state (max) ucc_regulation, by(pcsa)
tempfile base 
save `base'

use "ZCTALevelProcessed_R2_input.dta", clear
collapse (mean) rural income_pc cms_wage_index median_value median_gross_rent con_intensity pop_growth rpl_themes [w=tot_pop], by(pcsa)
merge 1:1 pcsa using `base', nogen
save "PCSALevelData_Raw.dta", replace 

use "PCSALevelData_Raw.dta", clear
merge 1:1 pcsa using "market_definition.dta", keep(3) nogen
gen le_highschool = og_less_highschool + og_highschool
gen gte_highschool = og_highschool + og_some_college + og_bachelor
gen female = og_female 
gen age_65 = og_age_65 
gen uninsured = og_uninsured
ren hisp hispanic 
ren black nonhisp_black 
ren other nonhisp_other
ren white nonhisp_white
replace tot_pop = tot_pop*10000*1000
local vars "hispanic nonhisp_white nonhisp_black nonhisp_other female age_65 uninsured le_highschool gte_highschool"
foreach v of local vars{
     replace `v' = `v'/tot_pop
}
replace tot_pop = tot_pop/10000/1000
drop if income_pc==.

gen cat_ucc = n_urgentcare
replace cat_ucc = 3 if cat_ucc>3
gen cat_hosp = n_hospitals
replace cat_hosp = 1 if cat_hosp>1
gen cat_hosp2 = n_hospitals
replace cat_hosp2 = 0 if cat_hosp2<=1
replace cat_hosp2 = 1 if cat_hosp2>=2
gen cat_aucc = n_hospaffucc_geo
replace cat_aucc = 1 if cat_aucc>1
gen n_ucc_aucc = n_urgentcare + n_hospaffucc_geo
gen cat_both = n_ucc_aucc
replace cat_both = 4 if cat_both>4
gen tot_pop2 = tot_pop
gen tot_pop3 = tot_pop
gen any_hosp = (n_hospitals>0)
gen any_aucc = (n_hospaffucc_geo>0)
gen any_ucc = (n_urgentcare>0) 

gen median_income_pc = .
qui sum income_pc, detail
replace median_income_pc = r(p50)
gen high_income = (income_pc>=median_income_pc)
drop median_income_pc

gen median_svi = .
qui sum rpl_themes, detail
replace median_svi = r(p50)
gen high_svi = (rpl_themes>=median_svi)
drop median_svi

gen median_uninsured = .
qui sum uninsured, detail
replace median_uninsured = r(p50)
gen high_uninsured = (uninsured>=median_uninsured)
drop median_uninsured

ren n_hospitals og_n_hospitals
gen n_hospitals = og_n_hospitals
replace n_hospitals = cat_hosp2

ren n_hospaffucc_geo og_n_hospaffucc_geo
gen n_hospaffucc_geo = og_n_hospaffucc_geo
replace n_hospaffucc_geo = cat_aucc

ren n_urgentcare og_n_urgentcare 
gen n_urgentcare = og_n_urgentcare
replace n_urgentcare = cat_ucc

label var rural "Rural"
label var n_hospitals "Additional hospital presence"
label var income_pc "Income per capita"
label var hispanic "Hispanic"
label var nonhisp_white "White"
label var nonhisp_black "Black"
label var gte_highschool "High school or more"
label var age_65 "Age 65 or more"
label var uninsured "Uninsured"
label var n_urgentcare "Number of UCCs"
label var n_hospaffucc_geo "Number of AUCCs"
label var cms_wage_index "CMS wage index"
label var con_intensity "CON laws" 

save "PCSALevelData_v3.dta", replace 
